#!/usr/bin/env python
# -*-coding:UTF-8 -*-
'''
@Project ：爬虫-波波老师
@File：15-xpath4k图片.py
@Author ：文非
@Date：2021/3/1116:29
@Require： 解析下载图片数据 http://pic.netbian.com/4kmeinv/
'''
import os
from os.path import exists
import os

import requests
from lxml import etree

if __name__ == "__main__":
    # ua伪装
    # UA伪装
    headers = {
        'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"
    }
    # 指定url
    url = "https://pic.netbian.com/4kmingxing/"
    # 发送请求 获取响应数据
    page = requests.get(url=url, headers=headers).text

    # 实例化etree对象
    tree = etree.HTML(page)
    # 利用etree的xpath方法定位获取标签属性值 img ==》src
    pic_list = tree.xpath('//div[@class="slist"]/ul/li//a/img/@src')
    # print(pic_list)
    if not os.path.exists("./4k图片/明星"):
        os.makedirs("./4k图片/明星")
    for pic in pic_list:
        # 拼接详细图片的请求地址
        get_url = "https://pic.netbian.com/"+pic
        # 对图片地址发送get请求
        pic_detail = requests.get(url=get_url,headers=headers).content
        # print(pic_detail)
        pic_name = pic.split("/")[-1]
        path = "./4k图片/明星/"+pic_name
        with open(path,"wb")as fp:
            fp.write(pic_detail)
            print(pic_name, get_url, "下载成功")
    print("************ 爬取完成 *************")

    # 乱码问题
    #     1 将获取的网页的编码改为utf-8



